-- | Parse reStructuredText and return Pandoc document.
module Text.Pandoc.Readers.RST ( 
                                readRST
                               ) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Shared 
import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
import List ( findIndex )
import Char ( toUpper )

-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
readRST = readWith parseRST

-- | Parse a string and print result (for testing).
testString :: String -> IO ()
testString = testStringWith parseRST

--
-- Constants and data structure definitions
---

bulletListMarkers = "*+-"
underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"

-- treat these as potentially non-text when parsing inline:
specialChars = "\\`|*_<>$:[-"

--
-- parsing documents
--

isAnonKeyBlock block = case block of
                         (Key [Str "_"] str) -> True
                         otherwise -> False

isNotAnonKeyBlock block = not (isAnonKeyBlock block)

isHeader1 :: Block -> Bool
isHeader1 (Header 1 _) = True
isHeader1 _ = False

isHeader2 :: Block -> Bool
isHeader2 (Header 2 _) = True
isHeader2 _ = False

-- | Promote all headers in a list of blocks.  (Part of
-- title transformation for RST.)
promoteHeaders :: Int -> [Block] -> [Block]
promoteHeaders num ((Header level text):rest) = 
    (Header (level - num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders num [] = []

-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
-- promote all the other headers. 
titleTransform :: [Block]              -- ^ list of blocks
               -> ([Block], [Inline])  -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) =  -- title and subtitle
    if (any isHeader1 rest) || (any isHeader2 rest) then
        ((Header 1 head1):(Header 2 head2):rest, [])
      else
        ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) =       -- title, no subtitle
    if (any isHeader1 rest) then
        ((Header 1 head1):rest, [])
      else
        ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])

parseRST = do
  state <- getState
  input <- getInput
  blocks <- parseBlocks   -- first pass
  let anonymousKeys = filter isAnonKeyBlock blocks
  let blocks' = if (null anonymousKeys) then
                    blocks
                  else -- run parser again to fill in anonymous links...
                    case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys }) 
                             "RST source, second pass" input of
                      Left err -> error $ "\nError:\n" ++ show err
                      Right result -> (filter isNotAnonKeyBlock result)
  let (blocks'', title) = if stateStandalone state then
                              titleTransform blocks'
                            else
                              (blocks', [])
  state <- getState
  let authors = stateAuthors state
  let date = stateDate state
  let title' = if (null title) then (stateTitle state) else title
  return (Pandoc (Meta title' authors date) blocks'')

--
-- parsing blocks
--

parseBlocks = do
  result <- manyTill block eof
  return result

block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey, 
                 imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock,
                 para, plain, blankBlock, nullBlock ] <?> "block"

--
-- field list
--

fieldListItem = try (do
  char ':'
  name <- many1 alphaNum
  string ": "
  skipSpaces
  first <- manyTill anyChar newline
  rest <- many (do{ notFollowedBy (char ':'); 
                    notFollowedBy blankline; 
                    skipSpaces;
                    manyTill anyChar newline })
  return (name, (joinWithSep " " (first:rest)))) 

fieldList = try (do
  items <- many1 fieldListItem
  blanklines
  let authors = case (lookup "Authors" items) of
                   Just auth -> [auth]
                   Nothing   -> map snd (filter (\(x,y) -> x == "Author") items)
  let date = case (lookup "Date" items) of
                   Just dat  -> dat
                   Nothing   -> ""
  let title = case (lookup "Title" items) of
                    Just tit  -> [Str tit]
                    Nothing   -> []
  let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") && 
                                    (x /= "Title")) items
  let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining
  updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title })
  return (BlockQuote result))

--
-- line block
--

lineBlockLine = try (do
  string "| "
  white <- many (oneOf " \t")
  line <- manyTill inline newline
  let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
  return line')

lineBlock = try (do
  lines <- many1 lineBlockLine
  blanklines
  return $ Para (concat lines))

--
-- paragraph block
--

para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"

codeBlockStart = try (do
                        string "::"
                        blankline
                        blankline)

-- paragraph that ends in a :: starting a code block
paraBeforeCodeBlock = try (do
  result <- many1 (do {notFollowedBy' codeBlockStart; inline})
  followedBy' (string "::")
  return (Para (if (last result == Space) then 
                    normalizeSpaces result
                else 
                    (normalizeSpaces result) ++ [Str ":"])))

-- regular paragraph
paraNormal = try (do 
  result <- many1 inline
  newline
  blanklines
  let result' = normalizeSpaces result
  return (Para result'))

plain = do
  result <- many1 inline
  let result' = normalizeSpaces result
  return (Plain result')

--
-- image block
--

imageBlock = try (do
    string ".. image:: "
    src <- manyTill anyChar newline
    return (Plain [Image [Str "image"] (Src src "")]))

--
-- header blocks
--

header = choice [ doubleHeader, singleHeader ] <?> "header"

-- a header with lines on top and bottom
doubleHeader = try (do
    c <- oneOf underlineChars
    rest <- many (char c)  -- the top line
    let lenTop = length (c:rest)
    skipSpaces
    newline
    txt <- many1 (do {notFollowedBy blankline; inline})
    pos <- getPosition  
    let len = (sourceColumn pos) - 1
    if (len > lenTop) then fail "title longer than border" else (do {return ()})
    blankline              -- spaces and newline
    count lenTop (char c)  -- the bottom line
    blanklines
    -- check to see if we've had this kind of header before.  
    -- if so, get appropriate level.  if not, add to list.
    state <- getState
    let headerTable = stateHeaderTable state
    let (headerTable', level) =  case findIndex (== DoubleHeader c) headerTable of
         Just ind    -> (headerTable, ind + 1)
         Nothing     -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
    setState (state { stateHeaderTable = headerTable' })
    return (Header level (normalizeSpaces txt)))

-- a header with line on the bottom only
singleHeader = try (do 
    notFollowedBy' whitespace
    txt <- many1 (do {notFollowedBy blankline; inline})
    pos <- getPosition
    let len = (sourceColumn pos) - 1
    blankline
    c <- oneOf underlineChars
    rest <- count (len - 1) (char c)
    many (char c)
    blanklines
    state <- getState
    let headerTable = stateHeaderTable state
    let (headerTable', level) =  case findIndex (== SingleHeader c) headerTable of
         Just ind    -> (headerTable, ind + 1)
         Nothing     -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
    setState (state { stateHeaderTable = headerTable' })
    return (Header level (normalizeSpaces txt)))

--
-- hrule block
--

hruleWith chr = 
    try (do
           count 4 (char chr)
           skipMany (char chr)
           skipSpaces
           newline
           blanklines
           return HorizontalRule)

hrule = choice (map hruleWith underlineChars) <?> "hrule"

--
-- code blocks
--

-- read a line indented by a given string
indentedLine indents = try (do
    string indents
    result <- manyTill anyChar newline
    return (result ++ "\n"))

-- two or more indented lines, possibly separated by blank lines
-- if variable = True, then any indent will work, but it must be consistent through the block
-- if variable = False, indent should be one tab or equivalent in spaces
indentedBlock variable = try (do 
  state <- getState
  let tabStop = stateTabStop state
  indents <- if variable then
                 many1 (oneOf " \t")
             else
                 oneOfStrings ["\t", (replicate tabStop ' ')]
  firstline <- manyTill anyChar newline
  rest <- many (choice [ indentedLine indents, 
                         try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})])
  option "" blanklines
  return (firstline ++ "\n" ++ (concat rest)))

codeBlock = try (do
    codeBlockStart
    result <- indentedBlock False  -- the False means we want one tab stop indent on each line
    return (CodeBlock result))

--
-- raw html
--

rawHtmlBlock = try (do
    string ".. raw:: html"
    blanklines
    result <- indentedBlock True
    return (RawHtml result))

--
-- raw latex
--

rawLaTeXBlock = try (do
    string ".. raw:: latex"
    blanklines
    result <- indentedBlock True
    return (Para [(TeX result)]))

--
-- block quotes
--

blockQuote = try (do
    block <- indentedBlock True
    -- parse the extracted block, which may contain various block elements:
    state <- getState
    let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) 
                  "block" (block ++ "\n\n") of
                   Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err
                   Right result -> result
    return (BlockQuote parsed))

--
-- list blocks
--

list = choice [ bulletList, orderedList ] <?> "list"

-- parses bullet list start and returns its length (inc. following whitespace)
bulletListStart = 
    try (do
           notFollowedBy' hrule  -- because hrules start out just like lists
           marker <- oneOf bulletListMarkers
           white <- many1 spaceChar
           let len = length (marker:white)
           return len) 

withPeriodSuffix parser = try (do
                                 a <- parser
                                 b <- char '.'
                                 return (a ++ [b]))

withParentheses parser = try (do
                                a <- char '('
                                b <- parser
                                c <- char ')'
                                return ([a] ++ b ++ [c]))

withRightParen parser = try (do
                               a <- parser
                               b <- char ')'
                               return (a ++ [b]))

upcaseWord = map toUpper

romanNumeral = do
  let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ]
  let upperNumerals = map upcaseWord lowerNumerals
  result <- choice $ map string (lowerNumerals ++ upperNumerals)
  return result

orderedListEnumerator = choice [ many1 digit, 
                                 string "#", 
                                 count 1 letter, 
                                 romanNumeral ]

-- parses ordered list start and returns its length (inc. following whitespace)
orderedListStart = 
    try (do
           marker <- choice [ withPeriodSuffix orderedListEnumerator, 
                              withParentheses orderedListEnumerator, 
                              withRightParen orderedListEnumerator ]
           white <- many1 spaceChar
           let len = length (marker ++ white)
           return len)

-- parse a line of a list item
listLine markerLength = try (do
  notFollowedBy blankline
  indentWith markerLength
  line <- manyTill anyChar newline
  return (line ++ "\n"))

-- indent by specified number of spaces (or equiv. tabs)
indentWith num = do
  state <- getState
  let tabStop = stateTabStop state
  if (num < tabStop) then
      count num  (char ' ')
    else
      choice [ try (count num (char ' ')), 
               (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] 

-- parse raw text for one list item, excluding start marker and continuations
rawListItem start = 
    try (do
           markerLength <- start
           firstLine <- manyTill anyChar newline
           restLines <- many (listLine markerLength)
           return (markerLength, (firstLine ++ "\n" ++ (concat restLines))))

-- continuation of a list item - indented and separated by blankline or (in compact lists) 
-- endline.  Note: nested lists are parsed as continuations.
listContinuation markerLength = 
    try (do
           blanks <- many1 blankline
           result <- many1 (listLine markerLength)
           return (blanks ++ (concat result)))

listItem start = 
    try (do 
           (markerLength, first) <- rawListItem start
           rest <- many (listContinuation markerLength)
           blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}), 
                              many1 blankline ]  -- whole list must end with blank
           -- parsing with ListItemState forces markers at beginning of lines to
           -- count as list item markers, even if not separated by blank space.
           -- see definition of "endline"
           state <- getState
           let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) 
                        "list item" raw of
                          Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err
                          Right result -> result
                   where raw = concat (first:rest) ++ blanks 
           return parsed)

orderedList = 
    try (do
           items <- many1 (listItem orderedListStart)
           let items' = compactify items
           return (OrderedList items'))

bulletList = 
    try (do
           items <- many1 (listItem bulletListStart)
           let items' = compactify items
           return (BulletList items'))

--
-- unknown directive (e.g. comment)
--

unknownDirective = try (do
    string ".. "
    manyTill anyChar newline
    many (do {string "   "; 
              char ':'; 
              many1 (noneOf "\n:"); 
              char ':'; 
              many1 (noneOf "\n"); 
              newline})
    option "" blanklines
    return Null)

-- 
-- reference key
--

referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]

imageKey = try (do
     string ".. |"
     ref <- manyTill inline (char '|')
     skipSpaces
     string "image::"
     src <- manyTill anyChar newline
     return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))

anonymousKey = try (do
     choice [string ".. __:", string "__"]
     skipSpaces
     src <- manyTill anyChar newline
     state <- getState
     return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))

regularKeyQuoted = try (do
     string ".. _`"
     ref <- manyTill inline (string "`:")
     skipSpaces
     src <- manyTill anyChar newline
     return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))

regularKey = try (do
     string ".. _"
     ref <- manyTill inline (char ':')
     skipSpaces
     src <- manyTill anyChar newline
     return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))

 -- 
 -- inline
 --

text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] <?> "text"

inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline"

special = choice [ link, image ] <?> "link, inline html, or image"

hyphens = try (do
  result <- many1 (char '-')
  option Space endline   -- don't want to treat endline after hyphen or dash as a space
  return (Str result))

escapedChar = escaped anyChar

symbol = do 
  result <- oneOf specialChars
  return (Str [result])

-- parses inline code, between codeStart and codeEnd
code = 
    try (do 
           string "``"
           result <- manyTill anyChar (string "``")
           let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
           return (Code result'))

emph = do
  result <- enclosed (char '*') (char '*') inline
  return (Emph (normalizeSpaces result))

strong = do
  result <- enclosed (string "**") (string "**") inline
  return (Strong (normalizeSpaces result))

whitespace = do
  many1 spaceChar <?> "whitespace"
  return Space

tabchar = do
  tab
  return (Str "\t")

str = do 
  notFollowedBy' oneWordReferenceLink
  result <- many1 (noneOf (specialChars ++ "\t\n "))
  return (Str result)

-- an endline character that can be treated as a space, not a structural break
endline =
    try (do
           newline
           notFollowedBy blankline
           -- parse potential list starts at beginning of line differently if in a list:
           st <- getState
           if ((stateParserContext st) == ListItemState) then 
               notFollowedBy' (choice [orderedListStart, bulletListStart])
             else 
               option () pzero
           return Space)

--
-- links
--

link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] <?> "link"

explicitLink = 
    try (do
           char '`'
           label <- manyTill inline (try (do {spaces; char '<'}))
           src <- manyTill (noneOf ">\n ") (char '>')
           skipSpaces
           string "`_"
           return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) ""))) 

anonymousLinkEnding = 
    try (do
           char '_'
           state <- getState
           let anonKeys = stateKeyBlocks state
           -- if there's a list of anon key refs (from previous pass), pop one off.
           -- otherwise return an anon key ref for the next pass to take care of...
           case anonKeys of
             (Key [Str "_"] src):rest -> 
                 do{ setState (state { stateKeyBlocks = rest }); 
                     return src }
             otherwise -> return (Ref [Str "_"]))

referenceLink = 
    try (do
           char '`'
           label <- manyTill inline (string "`_")
           src <- option (Ref []) anonymousLinkEnding
           return (Link (normalizeSpaces label) src)) 

oneWordReferenceLink =
    try (do
           label <- many1 alphaNum
           char '_'
           src <- option (Ref []) anonymousLinkEnding
           notFollowedBy alphaNum    -- because this_is_not a link
           return (Link [Str label] src)) 

uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", 
                           "news:", "telnet:" ]

uri = try (do
    scheme <- uriScheme
    identifier <- many1 (noneOf " \t\n")
    return (scheme ++ identifier))

autoURI = try (do
    src <- uri
    return (Link [Str src] (Src src "")))

emailChar = alphaNum <|> oneOf "-+_."

emailAddress = try (do
    firstLetter <- alphaNum
    restAddr <- many emailChar
    let addr = firstLetter:restAddr
    char '@'
    dom <- domain
    return (addr ++ '@':dom))

domainChar = alphaNum <|> char '-'

domain = try (do
    first <- many1 domainChar
    dom <- many1 (try (do{ char '.'; many1 domainChar }))
    return (joinWithSep "." (first:dom)))

autoEmail = try (do
    src <- emailAddress
    return (Link [Str src] (Src ("mailto:" ++ src) "")))

autoLink = autoURI <|> autoEmail

-- For now, we assume that all substitution references are for images.
image = 
    try (do
           char '|'
           ref <- manyTill inline (char '|')
           return (Image (normalizeSpaces ref) (Ref ref))) 
